Enabling light-weight shadows (especially shadow_mode_dirty).
Light-weight shadows leave all the page ref counts based on the guest p.t. pages,
while heavy-weight shadows do all their ref counts based on the shadow's p.t. pages.
shadow_mode_refcounts(dom) == 1 implies heavy-weight shadows.
int audit_adjust_pgtables(struct domain *d, int dir, int noisy)
{
int errors = 0;
- int shadow_enabled = shadow_mode_enabled(d) ? 1 : 0;
+ int shadow_refcounts = !!shadow_mode_refcounts(d);
+ int shadow_enabled = !!shadow_mode_enabled(d);
int l2limit;
void _adjust(struct pfn_info *page, int adjtype ADJUST_EXTRA_ARGS)
page->count_info += dir;
}
- void adjust_l2_page(unsigned long mfn)
+ void adjust_l2_page(unsigned long mfn, int shadow)
{
unsigned long *pt = map_domain_mem(mfn << PAGE_SHIFT);
int i;
if ( noisy )
{
- if ( shadow_enabled )
+ if ( shadow )
{
if ( page_get_owner(l1page) != NULL )
{
errors++;
continue;
}
+
+ u32 page_type = l1page->u.inuse.type_info & PGT_type_mask;
+
+ if ( page_type != PGT_l1_shadow )
+ {
+ printk("Audit %d: [Shadow L2 mfn=%lx i=%x] "
+ "Expected Shadow L1 t=%x mfn=%lx\n",
+ d->id, mfn, i,
+ l1page->u.inuse.type_info, l1mfn);
+ errors++;
+ }
}
else
{
"belonging to other dom %p (id=%d)\n",
l1mfn,
page_get_owner(l1page),
- page_get_owner(l1page)->id);
+ (page_get_owner(l1page)
+ ? page_get_owner(l1page)->id
+ : -1));
errors++;
continue;
}
}
}
- adjust(l1page, !shadow_enabled);
+ adjust(l1page, !shadow);
}
}
errors++;
}
- if ( shadow_enabled &&
+ if ( shadow_refcounts &&
page_is_page_table(gpage) &&
! page_out_of_sync(gpage) )
{
break;
case PGT_l1_shadow:
adjust(pfn_to_page(gmfn), 0);
- adjust_l1_page(smfn);
+ if ( shadow_refcounts )
+ adjust_l1_page(smfn);
if ( page->u.inuse.type_info & PGT_pinned )
adjust(page, 0);
break;
case PGT_hl2_shadow:
adjust(pfn_to_page(gmfn), 0);
- adjust_hl2_page(smfn);
+ if ( shadow_refcounts )
+ adjust_hl2_page(smfn);
if ( page->u.inuse.type_info & PGT_pinned )
adjust(page, 0);
break;
case PGT_l2_shadow:
adjust(pfn_to_page(gmfn), 0);
- adjust_l2_page(smfn);
+ adjust_l2_page(smfn, 1);
if ( page->u.inuse.type_info & PGT_pinned )
adjust(page, 0);
break;
struct exec_domain *ed;
for_each_exec_domain(d, ed)
- {
- if ( !shadow_enabled )
- {
- if ( pagetable_val(ed->arch.guest_table) )
- adjust(&frame_table[pagetable_val(ed->arch.guest_table)
- >> PAGE_SHIFT], 1);
- }
- else
- {
- if ( pagetable_val(ed->arch.guest_table) )
- adjust(&frame_table[pagetable_val(ed->arch.guest_table)
- >> PAGE_SHIFT], 0);
- if ( pagetable_val(ed->arch.shadow_table) )
- adjust(&frame_table[pagetable_val(ed->arch.shadow_table)
- >> PAGE_SHIFT], 0);
- if ( ed->arch.monitor_shadow_ref )
- adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
- }
- }
+ {
+ if ( pagetable_val(ed->arch.guest_table) )
+ adjust(&frame_table[pagetable_get_pfn(ed->arch.guest_table)], 1);
+ if ( pagetable_val(ed->arch.shadow_table) )
+ adjust(&frame_table[pagetable_get_pfn(ed->arch.shadow_table)], 0);
+ if ( ed->arch.monitor_shadow_ref )
+ adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
+ }
}
void adjust_guest_pages()
{
struct list_head *list_ent = d->page_list.next;
struct pfn_info *page;
- unsigned long mfn;
+ unsigned long mfn, snapshot_mfn;
while ( list_ent != &d->page_list )
{
u32 page_type;
page = list_entry(list_ent, struct pfn_info, list);
- mfn = page_to_pfn(page);
+ snapshot_mfn = mfn = page_to_pfn(page);
page_type = page->u.inuse.type_info & PGT_type_mask;
BUG_ON(page_get_owner(page) != d);
page_count++;
+ if ( shadow_enabled && !shadow_refcounts &&
+ page_out_of_sync(page) )
+ {
+ unsigned long gpfn = __mfn_to_gpfn(d, mfn);
+ ASSERT( VALID_M2P(gpfn) );
+ snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
+ ASSERT( snapshot_mfn );
+ }
+
switch ( page_type )
{
case PGT_l2_page_table:
if ( noisy )
{
- if ( shadow_enabled )
+ if ( shadow_refcounts )
{
printk("Audit %d: found an L2 guest page "
"mfn=%lx t=%08x c=%08x while in shadow mode\n",
errors++;
}
- if ( (page->u.inuse.type_info & PGT_validated) !=
- PGT_validated )
+ if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
{
- printk("Audit %d: L2 mfn=%lx not validated %08x\n",
- d->id, mfn, page->u.inuse.type_info);
- errors++;
- }
+ if ( (page->u.inuse.type_info & PGT_validated) !=
+ PGT_validated )
+ {
+ printk("Audit %d: L2 mfn=%lx not validated %08x\n",
+ d->id, mfn, page->u.inuse.type_info);
+ errors++;
+ }
- if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
- {
- printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n",
- d->id, mfn, page->u.inuse.type_info);
- errors++;
+ if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
+ {
+ printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n",
+ d->id, mfn, page->u.inuse.type_info);
+ errors++;
+ }
}
}
adjust(page, 1);
if ( page->u.inuse.type_info & PGT_validated )
- adjust_l2_page(mfn);
+ adjust_l2_page(snapshot_mfn, 0);
break;
if ( noisy )
{
- if ( shadow_enabled )
+ if ( shadow_refcounts )
{
printk("found an L1 guest page mfn=%lx t=%08x c=%08x "
"while in shadow mode\n",
errors++;
}
- if ( (page->u.inuse.type_info & PGT_validated) != PGT_validated )
- {
- printk("Audit %d: L1 not validated mfn=%lx t=%08x\n",
- d->id, mfn, page->u.inuse.type_info);
- errors++;
- }
-
- if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
+ if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
{
- if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
+ if ( (page->u.inuse.type_info & PGT_validated) !=
+ PGT_validated )
{
- printk("Audit %d: L1 mfn=%lx not pinned t=%08x\n",
+ printk("Audit %d: L1 not validated mfn=%lx t=%08x\n",
d->id, mfn, page->u.inuse.type_info);
errors++;
}
+
+ if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
+ {
+ if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
+ {
+ printk("Audit %d: L1 mfn=%lx not pinned t=%08x\n",
+ d->id, mfn, page->u.inuse.type_info);
+ }
+ }
}
}
adjust(page, 1);
if ( page->u.inuse.type_info & PGT_validated )
- adjust_l1_page(mfn);
+ adjust_l1_page(snapshot_mfn);
break;
break;
case PGT_writable_page:
- if ( shadow_enabled )
+ if ( shadow_refcounts )
{
// In shadow mode, writable pages can get pinned by
// paravirtualized guests that think they are pinning
void _audit_domain(struct domain *d, int flags)
{
+ int shadow_refcounts = !!shadow_mode_refcounts(d);
+
void scan_for_pfn_in_mfn(struct domain *d, unsigned long xmfn,
unsigned long mfn)
{
unmap_domain_mem(pt);
}
+ void scan_for_pfn_in_grant_table(struct domain *d, unsigned xmfn)
+ {
+ int i;
+ active_grant_entry_t *act = d->grant_table->active;
+
+ spin_lock(&d->grant_table->lock);
+
+ for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+ {
+ if ( act[i].pin && (act[i].frame == xmfn) )
+ {
+ printk(" found active grant table entry i=%d dom=%d pin=%d\n",
+ i, act[i].domid, act[i].pin);
+ }
+ }
+
+ spin_unlock(&d->grant_table->lock);
+ }
+
void scan_for_pfn(struct domain *d, unsigned long xmfn)
{
+ scan_for_pfn_in_grant_table(d, xmfn);
+
if ( !shadow_mode_enabled(d) )
{
struct list_head *list_ent = d->page_list.next;
// Maybe we should just be using BIGLOCK?
//
- if ( !(flags & AUDIT_ALREADY_LOCKED) )
+ if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) )
shadow_lock(d);
spin_lock(&d->page_alloc_lock);
errors++;
}
- if ( shadow_mode_enabled(d) &&
+ if ( shadow_mode_refcounts(d) &&
(page_type == PGT_writable_page) &&
!(page->u.inuse.type_info & PGT_validated) )
{
mfn);
errors++;
}
- if ( page_type != PGT_writable_page )
+ if ( shadow_refcounts
+ ? (page_type != PGT_writable_page)
+ : !(page_type && (page_type <= PGT_l4_page_table)) )
{
printk("out of sync page mfn=%lx has strange type "
"t=%08x c=%08x\n",
d->id, page->u.inuse.type_info,
page->tlbflush_timestamp,
page->count_info, mfn);
- errors++;
+ //errors++;
}
break;
default:
page->count_info,
page->u.inuse.type_info,
page->tlbflush_timestamp, mfn );
- errors++;
+ //errors++;
scan_for_pfn_remote(mfn);
}
d->id, page_to_pfn(page),
page->u.inuse.type_info,
page->count_info);
+ printk("a->gpfn_and_flags=%p\n",
+ (void *)a->gpfn_and_flags);
errors++;
}
break;
"pages=%d oos=%d l1=%d l2=%d ctot=%d ttot=%d\n",
d->id, page_count, oos_count, l1, l2, ctot, ttot);
- if ( !(flags & AUDIT_ALREADY_LOCKED) )
+ if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) )
shadow_unlock(d);
if ( d != current->domain )
/* Put the domain in shadow mode even though we're going to be using
* the shared 1:1 page table initially. It shouldn't hurt */
- shadow_mode_enable(ed->domain, SHM_enable|SHM_translate|SHM_external);
+ shadow_mode_enable(ed->domain,
+ SHM_enable|SHM_refcounts|SHM_translate|SHM_external);
}
return 0;
phys_basetab = c->pt_base;
ed->arch.guest_table = mk_pagetable(phys_basetab);
- if ( shadow_mode_enabled(d) )
+ if ( shadow_mode_refcounts(d) )
{
if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) )
return -EINVAL;
{
if ( pagetable_val(ed->arch.guest_table) != 0 )
{
- (shadow_mode_enabled(d) ? put_page : put_page_and_type)
- (&frame_table[pagetable_val(
- ed->arch.guest_table) >> PAGE_SHIFT]);
+ if ( shadow_mode_refcounts(d) )
+ put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
+ else
+ put_page_and_type(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
+
ed->arch.guest_table = mk_pagetable(0);
}
if ( pagetable_val(ed->arch.guest_table_user) != 0 )
{
- (shadow_mode_enabled(d) ? put_page : put_page_and_type)
- (&frame_table[pagetable_val(
- ed->arch.guest_table_user) >> PAGE_SHIFT]);
+ if ( shadow_mode_refcounts(d) )
+ put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
+ else
+ put_page_and_type(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
+
ed->arch.guest_table_user = mk_pagetable(0);
}
if ( opt_dom0_shadow || opt_dom0_translate )
{
shadow_mode_enable(d, (opt_dom0_translate
- ? SHM_enable | SHM_translate
+ ? SHM_enable | SHM_refcounts | SHM_translate
: SHM_enable));
if ( opt_dom0_translate )
{
idle_pg_table[1] = root_create_phys(pagetable_val(d->arch.phys_table),
__PAGE_HYPERVISOR);
translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT),
- pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT);
+ pagetable_get_pfn(ed->arch.guest_table));
idle_pg_table[1] = root_empty();
local_flush_tlb();
}
res = get_page_and_type(&frame_table[gmfn], d, PGT_ldt_page);
- if ( !res && unlikely(shadow_mode_enabled(d)) )
+ if ( !res && unlikely(shadow_mode_refcounts(d)) )
{
shadow_lock(d);
shadow_remove_all_write_access(d, gpfn, gmfn);
struct pfn_info *page;
unsigned long pfn;
- ASSERT( !shadow_mode_enabled(d) );
+ ASSERT( !shadow_mode_refcounts(d) );
if ( (root_get_flags(re) & _PAGE_RW) )
{
{
int rc;
- ASSERT(!shadow_mode_enabled(d));
+ ASSERT(!shadow_mode_refcounts(d));
if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
return 1;
get_page_from_l3e(
l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
{
+ ASSERT( !shadow_mode_refcounts(d) );
+
if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
return 1;
{
int rc;
+ ASSERT( !shadow_mode_refcounts(d) );
+
if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
return 1;
l1_pgentry_t *pl1e;
int i;
- ASSERT(!shadow_mode_enabled(d));
+ ASSERT(!shadow_mode_refcounts(d));
pl1e = map_domain_mem(pfn << PAGE_SHIFT);
l2_pgentry_t *pl2e;
int i;
+ // See the code in shadow_promote() to understand why this is here...
if ( (PGT_base_page_table == PGT_l2_page_table) &&
- shadow_mode_enabled(d) )
+ unlikely(shadow_mode_refcounts(d)) )
return 1;
- ASSERT( !shadow_mode_enabled(d) );
+
+ ASSERT( !shadow_mode_refcounts(d) );
pl2e = map_domain_mem(pfn << PAGE_SHIFT);
l3_pgentry_t *pl3e = page_to_virt(page);
int i;
- ASSERT( !shadow_mode_enabled(d) );
+ ASSERT( !shadow_mode_refcounts(d) );
for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
if ( is_guest_l3_slot(i) &&
l4_pgentry_t *pl4e = page_to_virt(page);
int i;
+ // See the code in shadow_promote() to understand why this is here...
if ( (PGT_base_page_table == PGT_l4_page_table) &&
- shadow_mode_enabled(d) )
+ shadow_mode_refcounts(d) )
return 1;
- ASSERT( !shadow_mode_enabled(d) );
+
+ ASSERT( !shadow_mode_refcounts(d) );
for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
if ( is_guest_l4_slot(i) &&
l1_pgentry_t ol1e;
struct domain *d = current->domain;
- ASSERT( !shadow_mode_enabled(d) );
-
if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
return 0;
+ if ( unlikely(shadow_mode_refcounts(d)) )
+ return update_l1e(pl1e, ol1e, nl1e);
+
if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
{
if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
return 0;
}
-
+
put_page_from_l1e(ol1e, d);
return 1;
}
void free_page_type(struct pfn_info *page, unsigned int type)
{
struct domain *owner = page_get_owner(page);
- if ( likely(owner != NULL) && unlikely(shadow_mode_enabled(owner)) )
- return;
+ unsigned long gpfn;
+
+ if ( owner != NULL )
+ {
+ if ( unlikely(shadow_mode_refcounts(owner)) )
+ return;
+ if ( unlikely(shadow_mode_enabled(owner)) )
+ {
+ gpfn = __mfn_to_gpfn(owner, page_to_pfn(page));
+ ASSERT(VALID_M2P(gpfn));
+ remove_shadow(owner, gpfn, type);
+ }
+ }
switch ( type )
{
int okay;
unsigned long old_base_mfn;
- if ( shadow_mode_enabled(d) )
+ if ( shadow_mode_refcounts(d) )
okay = get_page_from_pagenr(mfn, d);
else
okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
{
invalidate_shadow_ldt(ed);
- old_base_mfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT;
+ old_base_mfn = pagetable_get_pfn(ed->arch.guest_table);
ed->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
update_pagetables(ed); /* update shadow_table and monitor_table */
write_ptbase(ed);
- if ( shadow_mode_enabled(d) )
+ if ( shadow_mode_refcounts(d) )
put_page(&frame_table[old_base_mfn]);
else
put_page_and_type(&frame_table[old_base_mfn]);
- /* CR3 holds its own ref to its shadow. */
+ /* CR3 also holds a ref to its shadow... */
if ( shadow_mode_enabled(d) )
{
if ( ed->arch.monitor_shadow_ref )
put_shadow_ref(ed->arch.monitor_shadow_ref);
ed->arch.monitor_shadow_ref =
- pagetable_val(ed->arch.monitor_table) >> PAGE_SHIFT;
+ pagetable_get_pfn(ed->arch.monitor_table);
ASSERT(!page_get_owner(&frame_table[ed->arch.monitor_shadow_ref]));
get_shadow_ref(ed->arch.monitor_shadow_ref);
}
type = PGT_l1_page_table | PGT_va_mutable;
pin_page:
- if ( shadow_mode_enabled(FOREIGNDOM) )
+ if ( shadow_mode_refcounts(FOREIGNDOM) )
type = PGT_writable_page;
okay = get_page_and_type_from_pagenr(op.mfn, type, FOREIGNDOM);
else
{
unsigned long old_mfn =
- pagetable_val(ed->arch.guest_table_user) >> PAGE_SHIFT;
+ pagetable_get_pfn(ed->arch.guest_table_user);
ed->arch.guest_table_user = mk_pagetable(op.mfn << PAGE_SHIFT);
if ( old_mfn != 0 )
put_page_and_type(&frame_table[old_mfn]);
unsigned int foreigndom)
{
mmu_update_t req;
- unsigned long va = 0, mfn, prev_mfn = 0, gpfn;
+ void *va;
+ unsigned long gpfn, mfn;
struct pfn_info *page;
int rc = 0, okay = 1, i = 0, cpu = smp_processor_id();
unsigned int cmd, done = 0;
struct exec_domain *ed = current;
struct domain *d = ed->domain;
u32 type_info;
+ struct map_dom_mem_cache mapcache = MAP_DOM_MEM_CACHE_INIT;
+ struct map_dom_mem_cache sh_mapcache = MAP_DOM_MEM_CACHE_INIT;
LOCK_BIGLOCK(d);
}
cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
- mfn = req.ptr >> PAGE_SHIFT;
-
okay = 0;
switch ( cmd )
* MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
*/
case MMU_NORMAL_PT_UPDATE:
+
+ gpfn = req.ptr >> PAGE_SHIFT;
+ mfn = __gpfn_to_mfn(d, gpfn);
+
if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
{
MEM_LOG("Could not get page for normal update");
break;
}
- if ( likely(prev_mfn == mfn) )
- {
- va = (va & PAGE_MASK) | (req.ptr & ~PAGE_MASK);
- }
- else
- {
- if ( prev_mfn != 0 )
- unmap_domain_mem((void *)va);
- va = (unsigned long)map_domain_mem(req.ptr);
- prev_mfn = mfn;
- }
-
+ va = map_domain_mem_with_cache(req.ptr, &mapcache);
page = &frame_table[mfn];
+
switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
{
case PGT_l1_page_table:
- ASSERT(!shadow_mode_enabled(d));
+ ASSERT( !shadow_mode_refcounts(d) );
if ( likely(get_page_type(
page, type_info & (PGT_type_mask|PGT_va_mask))) )
{
- l1_pgentry_t pte;
+ l1_pgentry_t l1e;
/* FIXME: doesn't work with PAE */
- pte = l1e_create_phys(req.val, req.val);
- okay = mod_l1_entry((l1_pgentry_t *)va, pte);
+ l1e = l1e_create_phys(req.val, req.val);
+ okay = mod_l1_entry(va, l1e);
+ if ( okay && unlikely(shadow_mode_enabled(d)) )
+ shadow_l1_normal_pt_update(d, req.ptr, l1e, &sh_mapcache);
put_page_type(page);
}
break;
case PGT_l2_page_table:
- ASSERT(!shadow_mode_enabled(d));
+ ASSERT( !shadow_mode_refcounts(d) );
if ( likely(get_page_type(page, PGT_l2_page_table)) )
{
l2_pgentry_t l2e;
/* FIXME: doesn't work with PAE */
l2e = l2e_create_phys(req.val, req.val);
- okay = mod_l2_entry((l2_pgentry_t *)va, l2e, mfn);
+ okay = mod_l2_entry(va, l2e, mfn);
+ if ( okay && unlikely(shadow_mode_enabled(d)) )
+ shadow_l2_normal_pt_update(d, req.ptr, l2e, &sh_mapcache);
put_page_type(page);
}
break;
#ifdef __x86_64__
case PGT_l3_page_table:
- ASSERT(!shadow_mode_enabled(d));
+ ASSERT( !shadow_mode_refcounts(d) );
if ( likely(get_page_type(page, PGT_l3_page_table)) )
{
l3_pgentry_t l3e;
/* FIXME: doesn't work with PAE */
l3e = l3e_create_phys(req.val,req.val);
- okay = mod_l3_entry((l3_pgentry_t *)va, l3e, mfn);
+ okay = mod_l3_entry(va, l3e, mfn);
+ if ( okay && unlikely(shadow_mode_enabled(d)) )
+ shadow_l3_normal_pt_update(d, req.ptr, l3e, &sh_mapcache);
put_page_type(page);
}
break;
case PGT_l4_page_table:
- ASSERT(!shadow_mode_enabled(d));
+ ASSERT( !shadow_mode_refcounts(d) );
if ( likely(get_page_type(page, PGT_l4_page_table)) )
{
l4_pgentry_t l4e;
l4e = l4e_create_phys(req.val,req.val);
- okay = mod_l4_entry((l4_pgentry_t *)va, l4e, mfn);
+ okay = mod_l4_entry(va, l4e, mfn);
+ if ( okay && unlikely(shadow_mode_enabled(d)) )
+ shadow_l4_normal_pt_update(d, req.ptr, l4e, &sh_mapcache);
put_page_type(page);
}
break;
if ( shadow_mode_log_dirty(d) )
__mark_dirty(d, mfn);
- gpfn = __mfn_to_gpfn(d, mfn);
- ASSERT(VALID_M2P(gpfn));
-
if ( page_is_page_table(page) &&
!page_out_of_sync(page) )
{
break;
}
+ unmap_domain_mem_with_cache(va, &mapcache);
+
put_page(page);
break;
case MMU_MACHPHYS_UPDATE:
+ mfn = req.ptr >> PAGE_SHIFT;
+ gpfn = req.val;
+
/* HACK ALERT... Need to think about this some more... */
if ( unlikely(shadow_mode_translate(FOREIGNDOM) && IS_PRIV(d)) )
{
- rc = FOREIGNDOM->next_io_page++;
- printk("privileged guest dom%d requests mfn=%lx for dom%d, "
- "gets pfn=%x\n",
- d->id, mfn, FOREIGNDOM->id, rc);
- set_machinetophys(mfn, rc);
- set_p2m_entry(FOREIGNDOM, rc, mfn);
+ shadow_lock(FOREIGNDOM);
+ printk("privileged guest dom%d requests pfn=%lx to map mfn=%lx for dom%d\n",
+ d->id, gpfn, mfn, FOREIGNDOM->id);
+ set_machinetophys(mfn, gpfn);
+ set_p2m_entry(FOREIGNDOM, gpfn, mfn, NULL, NULL);
okay = 1;
+ shadow_unlock(FOREIGNDOM);
break;
}
-
+
if ( unlikely(!get_page_from_pagenr(mfn, FOREIGNDOM)) )
{
MEM_LOG("Could not get page for mach->phys update");
break;
}
- set_machinetophys(mfn, req.val);
+ set_machinetophys(mfn, gpfn);
okay = 1;
/*
}
out:
- if ( prev_mfn != 0 )
- unmap_domain_mem((void *)va);
+ unmap_domain_mem_cache(&mapcache);
+ unmap_domain_mem_cache(&sh_mapcache);
process_deferred_ops(cpu);
/* This function assumes the caller is holding the domain's BIGLOCK
* and is running in a shadow mode
*/
-int update_shadow_va_mapping(unsigned long va,
- l1_pgentry_t val,
- struct exec_domain *ed,
- struct domain *d)
-{
- unsigned long l1mfn;
- l1_pgentry_t spte;
- int rc = 0;
-
- check_pagetable(ed, "pre-va"); /* debug */
- shadow_lock(d);
-
- // This is actually overkill - we don't need to sync the L1 itself,
- // just everything involved in getting to this L1 (i.e. we need
- // linear_pg_table[l1_linear_offset(va)] to be in sync)...
- //
- __shadow_sync_va(ed, va);
-
-#if 1 /* keep check_pagetables() happy */
- /*
- * However, the above doesn't guarantee that there's no snapshot of
- * the L1 table in question; it just says that the relevant L2 and L1
- * entries for VA are in-sync. There might still be a snapshot.
- *
- * The checking code in _check_pagetables() assumes that no one will
- * mutate the shadow of a page that has a snapshot. It's actually
- * OK to not sync this page, but it seems simpler to:
- * 1) keep all code paths the same, and
- * 2) maintain the invariant for _check_pagetables(), rather than try
- * to teach it about this boundary case.
- * So we flush this L1 page, if it's out of sync.
- */
- l1mfn = l2e_get_pfn(linear_l2_table(ed)[l2_table_offset(va)]);
- if ( mfn_out_of_sync(l1mfn) )
- {
- perfc_incrc(extra_va_update_sync);
- __shadow_sync_mfn(d, l1mfn);
- }
-#endif /* keep check_pagetables() happy */
-
- if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
- &val, sizeof(val))))
- {
- rc = -EINVAL;
- goto out;
- }
-
- // also need to update the shadow
-
- l1pte_propagate_from_guest(d, val, &spte);
- shadow_set_l1e(va, spte, 0);
-
- /*
- * If we're in log-dirty mode then we need to note that we've updated
- * the PTE in the PT-holding page. We need the machine frame number
- * for this.
- */
- if ( shadow_mode_log_dirty(d) )
- mark_dirty(d, va_to_l1mfn(ed, va));
-
- out:
- shadow_unlock(d);
- check_pagetable(ed, "post-va"); /* debug */
-
- return rc;
-}
-
int update_grant_va_mapping(unsigned long va,
l1_pgentry_t _nl1e,
struct domain *d,
cleanup_writable_pagetable(d);
+ // This is actually overkill - we don't need to sync the L1 itself,
+ // just everything involved in getting to this L1 (i.e. we need
+ // linear_pg_table[l1_linear_offset(va)] to be in sync)...
+ //
+ __shadow_sync_va(ed, va);
+
pl1e = &linear_pg_table[l1_linear_offset(va)];
if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
rc = -EINVAL;
- else
+ else if ( !shadow_mode_refcounts(d) )
{
if ( update_l1e(pl1e, ol1e, _nl1e) )
{
else
rc = -EINVAL;
}
+ else
+ {
+ printk("grant tables and shadow mode currently don't work together\n");
+ BUG();
+ }
if ( unlikely(shadow_mode_enabled(d)) )
- update_shadow_va_mapping(va, _nl1e, ed, d);
+ shadow_do_update_va_mapping(va, _nl1e, ed);
return rc;
}
cleanup_writable_pagetable(d);
if ( unlikely(shadow_mode_enabled(d)) )
+ check_pagetable(ed, "pre-va"); /* debug */
+
+ if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
+ val)) )
+ rc = -EINVAL;
+
+ if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) )
{
if ( unlikely(percpu_info[cpu].foreign &&
(shadow_mode_translate(d) ||
domain_crash();
}
- rc = update_shadow_va_mapping(va, val, ed, d);
+ rc = shadow_do_update_va_mapping(va, val, ed);
+
+ check_pagetable(ed, "post-va"); /* debug */
}
- else if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
- val)) )
- rc = -EINVAL;
switch ( flags & UVMF_FLUSHTYPE_MASK )
{
#define PTWR_PRINTK(_f, _a...) ((void)0)
#endif
+/* Re-validate a given p.t. page, given its prior snapshot */
+int revalidate_l1(struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t *snapshot)
+{
+ l1_pgentry_t ol1e, nl1e;
+ int modified = 0, i;
+
+#if 0
+ if ( d->id )
+ printk("%s: l1page mfn=%lx snapshot mfn=%lx\n", __func__,
+ l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned long)l1page)]),
+ l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned long)snapshot)]));
+#endif
+
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ {
+ ol1e = snapshot[i];
+ nl1e = l1page[i];
+
+ if ( likely(l1e_get_value(ol1e) == l1e_get_value(nl1e)) )
+ continue;
+
+ /* Update number of entries modified. */
+ modified++;
+
+ /*
+ * Fast path for PTEs that have merely been write-protected
+ * (e.g., during a Unix fork()). A strict reduction in privilege.
+ */
+ if ( likely(l1e_get_value(ol1e) == (l1e_get_value(nl1e)|_PAGE_RW)) )
+ {
+ if ( likely(l1e_get_flags(nl1e) & _PAGE_PRESENT) )
+ put_page_type(&frame_table[l1e_get_pfn(nl1e)]);
+ continue;
+ }
+
+ if ( unlikely(!get_page_from_l1e(nl1e, d)) )
+ {
+ MEM_LOG("ptwr: Could not re-validate l1 page\n");
+ /*
+ * Make the remaining p.t's consistent before crashing, so the
+ * reference counts are correct.
+ */
+ memcpy(&l1page[i], &snapshot[i],
+ (L1_PAGETABLE_ENTRIES - i) * sizeof(l1_pgentry_t));
+ domain_crash();
+ break;
+ }
+
+ put_page_from_l1e(ol1e, d);
+ }
+
+ return modified;
+}
+
+
/* Flush the given writable p.t. page and write-protect it again. */
void ptwr_flush(struct domain *d, const int which)
{
unsigned long pte, *ptep, l1va;
- l1_pgentry_t *pl1e, ol1e, nl1e;
+ l1_pgentry_t *pl1e;
l2_pgentry_t *pl2e;
- int i;
- unsigned int modified = 0;
+ unsigned int modified;
ASSERT(!shadow_mode_enabled(d));
*/
pl1e = d->arch.ptwr[which].pl1e;
- for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
- {
- ol1e = d->arch.ptwr[which].page[i];
- nl1e = pl1e[i];
-
- if ( likely(l1e_get_value(ol1e) == l1e_get_value(nl1e)) )
- continue;
-
- /* Update number of entries modified. */
- modified++;
-
- /*
- * Fast path for PTEs that have merely been write-protected
- * (e.g., during a Unix fork()). A strict reduction in privilege.
- */
- if ( likely(l1e_get_value(ol1e) == (l1e_get_value(nl1e)|_PAGE_RW)) )
- {
- if ( likely(l1e_get_flags(nl1e) & _PAGE_PRESENT) )
- put_page_type(&frame_table[l1e_get_pfn(nl1e)]);
- continue;
- }
-
- if ( unlikely(!get_page_from_l1e(nl1e, d)) )
- {
- MEM_LOG("ptwr: Could not re-validate l1 page\n");
- /*
- * Make the remaining p.t's consistent before crashing, so the
- * reference counts are correct.
- */
- memcpy(&pl1e[i], &d->arch.ptwr[which].page[i],
- (L1_PAGETABLE_ENTRIES - i) * sizeof(l1_pgentry_t));
- domain_crash();
- break;
- }
-
- put_page_from_l1e(ol1e, d);
- }
+ modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
unmap_domain_mem(pl1e);
-
perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
d->arch.ptwr[which].prev_nr_updates = modified;
#include <xen/sched.h>
#include <xen/trace.h>
+#define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
+
static void shadow_free_snapshot(struct domain *d,
struct out_of_sync_entry *entry);
static void remove_out_of_sync_entries(struct domain *d, unsigned long smfn);
static void free_writable_pte_predictions(struct domain *d);
+#if SHADOW_DEBUG
+static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn);
+#endif
+
/********
There's a per-domain shadow table spin lock which works fine for SMP
__shadow_sync_mfn(d, gmfn);
}
+ if ( !shadow_mode_refcounts(d) )
+ return 1;
+
if ( unlikely(page_is_page_table(page)) )
return 1;
// TLB flushes required when promoting a writable page, and also deal
// with any outstanding (external) writable refs to this page (by
// refusing to promote it). The pinning headache complicates this
- // code -- it would all much get simpler if we stop using
+ // code -- it would all get much simpler if we stop using
// shadow_lock() and move the shadow code to BIGLOCK().
//
if ( unlikely(!get_page(page, d)) )
static inline void
shadow_demote(struct domain *d, unsigned long gpfn, unsigned long gmfn)
{
+ if ( !shadow_mode_refcounts(d) )
+ return;
+
ASSERT(frame_table[gmfn].count_info & PGC_page_table);
if ( shadow_max_pgtable_type(d, gpfn, NULL) == PGT_none )
else
{
page = alloc_domheap_page(NULL);
- void *l1 = map_domain_mem(page_to_pfn(page) << PAGE_SHIFT);
+ void *l1 = map_domain_mem(page_to_phys(page));
memset(l1, 0, PAGE_SIZE);
unmap_domain_mem(l1);
}
for ( i = min; i <= max; i++ )
{
- put_page_from_l1e(pl1e[i], d);
+ shadow_put_page_from_l1e(pl1e[i], d);
pl1e[i] = l1e_empty();
}
static void inline
free_shadow_l2_table(struct domain *d, unsigned long smfn)
{
- unsigned long *pl2e = map_domain_mem(smfn << PAGE_SHIFT);
+ l2_pgentry_t *pl2e = map_domain_mem(smfn << PAGE_SHIFT);
int i, external = shadow_mode_external(d);
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
if ( external || is_guest_l2_slot(i) )
- if ( pl2e[i] & _PAGE_PRESENT )
- put_shadow_ref(pl2e[i] >> PAGE_SHIFT);
+ if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
+ put_shadow_ref(l2e_get_pfn(pl2e[i]));
if ( (PGT_base_page_table == PGT_l2_page_table) &&
shadow_mode_translate(d) && !external )
{
// free the ref to the hl2
//
- put_shadow_ref(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]
- >> PAGE_SHIFT);
+ put_shadow_ref(l2e_get_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]));
}
unmap_domain_mem(pl2e);
free_domheap_page(page);
}
+void
+remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
+{
+ unsigned long smfn;
+
+ //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
+
+ shadow_lock(d);
+
+ while ( stype >= PGT_l1_shadow )
+ {
+ smfn = __shadow_status(d, gpfn, stype);
+ if ( smfn && MFN_PINNED(smfn) )
+ shadow_unpin(smfn);
+ stype -= PGT_l1_shadow;
+ }
+
+ shadow_unlock(d);
+}
+
static void inline
release_out_of_sync_entry(struct domain *d, struct out_of_sync_entry *entry)
{
//
free_out_of_sync_state(d);
- // second, remove any outstanding refs from ed->arch.shadow_table...
+ // second, remove any outstanding refs from ed->arch.shadow_table
+ // and CR3.
//
for_each_exec_domain(d, ed)
{
if ( pagetable_val(ed->arch.shadow_table) )
{
- put_shadow_ref(pagetable_val(ed->arch.shadow_table) >> PAGE_SHIFT);
+ put_shadow_ref(pagetable_get_pfn(ed->arch.shadow_table));
ed->arch.shadow_table = mk_pagetable(0);
}
+
+ if ( ed->arch.monitor_shadow_ref )
+ {
+ put_shadow_ref(ed->arch.monitor_shadow_ref);
+ ed->arch.monitor_shadow_ref = 0;
+ }
}
// For external shadows, remove the monitor table's refs
// under us... First, collect the list of pinned pages, then
// free them.
//
-#define PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
for ( i = 0; i < shadow_ht_buckets; i++ )
{
u32 count;
count = 0;
for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
- if ( PINNED(x->smfn) )
+ if ( MFN_PINNED(x->smfn) )
count++;
if ( !count )
continue;
mfn_list = xmalloc_array(unsigned long, count);
count = 0;
for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
- if ( PINNED(x->smfn) )
+ if ( MFN_PINNED(x->smfn) )
mfn_list[count++] = x->smfn;
while ( count )
}
xfree(mfn_list);
}
-#undef PINNED
+
+ // Now free the pre-zero'ed pages from the domain
+ //
+ struct list_head *list_ent, *tmp;
+ list_for_each_safe(list_ent, tmp, &d->arch.free_shadow_frames)
+ {
+ list_del(list_ent);
+ perfc_decr(free_l1_pages);
+
+ struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
+ free_domheap_page(page);
+ }
shadow_audit(d, 0);
{
}
-int _shadow_mode_enabled(struct domain *d)
+int _shadow_mode_refcounts(struct domain *d)
{
- return shadow_mode_enabled(d);
+ return shadow_mode_refcounts(d);
}
static void alloc_monitor_pagetable(struct exec_domain *ed)
/*
* Then free monitor_table.
*/
- mfn = (pagetable_val(ed->arch.monitor_table)) >> PAGE_SHIFT;
+ mfn = pagetable_get_pfn(ed->arch.monitor_table);
free_domheap_page(&frame_table[mfn]);
ed->arch.monitor_table = mk_pagetable(0);
}
int
-set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn)
+set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn,
+ struct map_dom_mem_cache *l2cache,
+ struct map_dom_mem_cache *l1cache)
{
unsigned long phystab = pagetable_val(d->arch.phys_table);
l2_pgentry_t *l2, l2e;
ASSERT( phystab );
- l2 = map_domain_mem(phystab);
+ l2 = map_domain_mem_with_cache(phystab, l2cache);
l2e = l2[l2_table_offset(va)];
- if ( !l2e_get_value(l2e) ) /* FIXME: check present bit? */
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
{
l1page = alloc_domheap_page(NULL);
if ( !l1page )
+ {
+ unmap_domain_mem_with_cache(l2, l2cache);
return 0;
+ }
- l1 = map_domain_mem(page_to_pfn(l1page) << PAGE_SHIFT);
+ l1 = map_domain_mem_with_cache(page_to_phys(l1page), l1cache);
memset(l1, 0, PAGE_SIZE);
- unmap_domain_mem(l1);
+ unmap_domain_mem_with_cache(l1, l1cache);
l2e = l2e_create_pfn(page_to_pfn(l1page), __PAGE_HYPERVISOR);
l2[l2_table_offset(va)] = l2e;
}
- unmap_domain_mem(l2);
+ unmap_domain_mem_with_cache(l2, l2cache);
- l1 = map_domain_mem(l2e_get_phys(l2e));
+ l1 = map_domain_mem_with_cache(l2e_get_phys(l2e), l1cache);
l1[l1_table_offset(va)] = l1e_create_pfn(mfn, __PAGE_HYPERVISOR);
- unmap_domain_mem(l1);
+ unmap_domain_mem_with_cache(l1, l1cache);
return 1;
}
struct pfn_info *page, *l2page;
l2_pgentry_t *l2;
unsigned long mfn, pfn;
+ struct map_dom_mem_cache l2cache = MAP_DOM_MEM_CACHE_INIT;
+ struct map_dom_mem_cache l1cache = MAP_DOM_MEM_CACHE_INIT;
l2page = alloc_domheap_page(NULL);
if ( !l2page )
return 0;
- d->arch.phys_table = mk_pagetable(page_to_pfn(l2page) << PAGE_SHIFT);
- l2 = map_domain_mem(page_to_pfn(l2page) << PAGE_SHIFT);
+ d->arch.phys_table = mk_pagetable(page_to_phys(l2page));
+ l2 = map_domain_mem_with_cache(page_to_phys(l2page), &l2cache);
memset(l2, 0, PAGE_SIZE);
- unmap_domain_mem(l2);
+ unmap_domain_mem_with_cache(l2, &l2cache);
list_ent = d->page_list.next;
while ( list_ent != &d->page_list )
ASSERT(pfn != INVALID_M2P_ENTRY);
ASSERT(pfn < (1u<<20));
- set_p2m_entry(d, pfn, mfn);
+ set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
list_ent = page->list.next;
}
if ( (pfn != INVALID_M2P_ENTRY) &&
(pfn < (1u<<20)) )
{
- set_p2m_entry(d, pfn, mfn);
+ set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
}
list_ent = page->list.next;
}
+ unmap_domain_mem_cache(&l2cache);
+ unmap_domain_mem_cache(&l1cache);
+
return 1;
}
{
// external guests provide their own memory for their P2M maps.
//
- ASSERT( d == page_get_owner(&frame_table[pagetable_val(
- d->arch.phys_table)>>PAGE_SHIFT]) );
+ ASSERT( d == page_get_owner(
+ &frame_table[pagetable_get_pfn(d->arch.phys_table)]) );
}
}
printk("audit1\n");
- _audit_domain(d, AUDIT_ALREADY_LOCKED | AUDIT_ERRORS_OK);
+ _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
printk("audit1 done\n");
// Get rid of any shadow pages from any previous shadow mode.
free_shadow_pages(d);
printk("audit2\n");
- _audit_domain(d, AUDIT_ALREADY_LOCKED | AUDIT_ERRORS_OK);
+ _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
printk("audit2 done\n");
- // Turn off writable page tables.
- // It doesn't mix with shadow mode.
- // And shadow mode offers a superset of functionality.
- //
- vm_assist(d, VMASST_CMD_disable, VMASST_TYPE_writable_pagetables);
-
/*
* Tear down it's counts by disassembling its page-table-based ref counts.
* Also remove CR3's gcount/tcount.
* Assert that no pages are left with L1/L2/L3/L4 type.
*/
audit_adjust_pgtables(d, -1, 1);
+
d->arch.shadow_mode = mode;
- struct list_head *list_ent = d->page_list.next;
- while ( list_ent != &d->page_list )
+ if ( shadow_mode_refcounts(d) )
{
- struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
- if ( !get_page_type(page, PGT_writable_page) )
- BUG();
- put_page_type(page);
+ struct list_head *list_ent = d->page_list.next;
+ while ( list_ent != &d->page_list )
+ {
+ struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
+ if ( !get_page_type(page, PGT_writable_page) )
+ BUG();
+ put_page_type(page);
- list_ent = page->list.next;
+ list_ent = page->list.next;
+ }
}
audit_adjust_pgtables(d, 1, 1);
printk("audit3\n");
- _audit_domain(d, AUDIT_ALREADY_LOCKED);
+ _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
printk("audit3 done\n");
return 0;
* Currently this does not fix up page ref counts, so it is valid to call
* only when a domain is being destroyed.
*/
- BUG_ON(!test_bit(DF_DYING, &d->d_flags));
- d->arch.shadow_tainted_refcnts = 1;
+ BUG_ON(!test_bit(DF_DYING, &d->d_flags) && shadow_mode_refcounts(d));
+ d->arch.shadow_tainted_refcnts = shadow_mode_refcounts(d);
free_shadow_pages(d);
free_writable_pte_predictions(d);
}
}
#endif
-
+
d->arch.shadow_mode = 0;
free_shadow_ht_entries(d);
free_out_of_sync_entries(d);
+
+ struct exec_domain *ed;
+ for_each_exec_domain(d, ed)
+ {
+ update_pagetables(ed);
+ }
}
static int shadow_mode_table_op(
switch ( op )
{
case DOM0_SHADOW_CONTROL_OP_OFF:
+ __shadow_sync_all(d);
__shadow_mode_disable(d);
break;
case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
free_shadow_pages(d);
rc = __shadow_mode_enable(
- d, d->arch.shadow_mode|SHM_enable|SHM_translate);
+ d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate);
break;
default:
if ( init_table )
{
+ l1_pgentry_t sl1e;
+ int index = l1_table_offset(va);
+ int min = 1, max = 0;
+
gpl1e = &(linear_pg_table[l1_linear_offset(va) &
~(L1_PAGETABLE_ENTRIES-1)]);
spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
~(L1_PAGETABLE_ENTRIES-1)]);
- l1_pgentry_t sl1e;
- int index = l1_table_offset(va);
- int min = 1, max = 0;
-
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
{
l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
- !shadow_get_page_from_l1e(sl1e, d) )
+ unlikely(!shadow_get_page_from_l1e(sl1e, d)) )
sl1e = l1e_empty();
- if ( l1e_get_value(sl1e) == 0 ) /* FIXME: check flags? */
+ if ( l1e_get_flags(sl1e) == 0 )
{
// First copy entries from 0 until first invalid.
// Then copy entries from index until first invalid.
if ( !get_shadow_ref(smfn) )
BUG();
- if ( shadow_max_pgtable_type(d, gpfn, &sl1mfn) == PGT_l1_shadow )
+ if ( shadow_mode_refcounts(d) &&
+ (shadow_max_pgtable_type(d, gpfn, &sl1mfn) == PGT_l1_shadow) )
min_max = pfn_to_page(sl1mfn)->tlbflush_timestamp;
pfn_to_page(smfn)->tlbflush_timestamp = min_max;
ASSERT(spin_is_locked(&d->arch.shadow_lock));
ASSERT(pfn_valid(mfn));
- ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page);
+
+#ifndef NDEBUG
+ u32 type = page->u.inuse.type_info & PGT_type_mask;
+ if ( shadow_mode_refcounts(d) )
+ {
+ ASSERT(type == PGT_writable_page);
+ }
+ else
+ {
+ ASSERT(type && (type < PGT_l4_page_table));
+ }
+#endif
FSH_LOG("%s(gpfn=%lx, mfn=%lx) c=%08x t=%08x", __func__,
gpfn, mfn, page->count_info, page->u.inuse.type_info);
entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
entry->writable_pl1e = -1;
+#if SHADOW_DEBUG
+ mark_shadows_as_reflecting_snapshot(d, gpfn);
+#endif
+
// increment guest's ref count to represent the entry in the
// full shadow out-of-sync list.
//
int __shadow_out_of_sync(struct exec_domain *ed, unsigned long va)
{
struct domain *d = ed->domain;
- unsigned long l2mfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT;
+ unsigned long l2mfn = pagetable_get_pfn(ed->arch.guest_table);
l2_pgentry_t l2e;
unsigned long l1mfn;
perfc_incrc(shadow_out_of_sync_calls);
+ // PERF BUG: snapshot_entry_matches will call map_domain_mem() on the l2
+ // page, but it's already available at ed->arch.guest_vtable...
+ // Ditto for the sl2 page and ed->arch.shadow_vtable.
+ //
if ( page_out_of_sync(&frame_table[l2mfn]) &&
!snapshot_entry_matches(ed, l2mfn, l2_table_offset(va)) )
return 1;
if ( !VALID_MFN(l1mfn) )
return 0;
+ // PERF BUG: snapshot_entry_matches will call map_domain_mem() on the l1
+ // page, but it's already available at linear_pg_table[l1_linear_offset()].
+ // Ditto for the sl1 page and shadow_linear_pg_table[]...
+ //
if ( page_out_of_sync(&frame_table[l1mfn]) &&
!snapshot_entry_matches(ed, l1mfn, l1_table_offset(va)) )
return 1;
found++;
pt[i] = new;
if ( is_l1_shadow )
- put_page_from_l1e(old, d);
+ shadow_put_page_from_l1e(old, d);
#if 0
printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
//
write_refs =
(frame_table[readonly_gmfn].u.inuse.type_info & PGT_count_mask);
- if ( write_refs &&
- (frame_table[readonly_gmfn].u.inuse.type_info & PGT_pinned) )
+ if ( write_refs && MFN_PINNED(readonly_gmfn) )
{
write_refs--;
}
count++;
if ( is_l1_shadow )
- put_page_from_l1e(ol2e, d);
+ shadow_put_page_from_l1e(ol2e, d);
else /* must be an hl2 page */
put_page(&frame_table[forbidden_gmfn]);
}
if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
continue;
- if ( !(smfn = __shadow_status(d, entry->gpfn, stype)) )
- continue;
+ smfn = __shadow_status(d, entry->gpfn, stype);
+
+ if ( !smfn )
+ {
+ if ( shadow_mode_refcounts(d) )
+ continue;
+
+ // For light weight shadows, even when no shadow page exists,
+ // we need to resync the refcounts to the new contents of the
+ // guest page.
+ // This only applies when we have writable page tables.
+ //
+ if ( (stype == PGT_l1_shadow) && !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
+ continue;
+ if ( (stype != PGT_l1_shadow) && !shadow_mode_write_all(d) )
+ continue;
+ }
FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn);
//
guest = map_domain_mem(entry->gmfn << PAGE_SHIFT);
snapshot = map_domain_mem(entry->snapshot_mfn << PAGE_SHIFT);
- shadow = map_domain_mem(smfn << PAGE_SHIFT);
+
+ if ( smfn )
+ shadow = map_domain_mem(smfn << PAGE_SHIFT);
+ else
+ shadow = NULL;
+
unshadow = 0;
switch ( stype ) {
case PGT_l1_shadow:
{
+ l1_pgentry_t *guest1 = guest;
+ l1_pgentry_t *shadow1 = shadow;
+ l1_pgentry_t *snapshot1 = snapshot;
+
+ ASSERT(VM_ASSIST(d, VMASST_TYPE_writable_pagetables));
+
+ if ( !shadow_mode_refcounts(d) )
+ revalidate_l1(d, guest1, snapshot1);
+
+ if ( !smfn )
+ break;
+
u32 min_max_shadow = pfn_to_page(smfn)->tlbflush_timestamp;
int min_shadow = SHADOW_MIN(min_max_shadow);
int max_shadow = SHADOW_MAX(min_max_shadow);
int min_snapshot = SHADOW_MIN(min_max_snapshot);
int max_snapshot = SHADOW_MAX(min_max_snapshot);
- l1_pgentry_t *guest1 = guest;
- l1_pgentry_t *shadow1 = shadow;
- l1_pgentry_t *snapshot1 = snapshot;
-
changed = 0;
for ( i = min_shadow; i <= max_shadow; i++ )
l2_pgentry_t *shadow2 = shadow;
l2_pgentry_t *snapshot2 = snapshot;
+ ASSERT(shadow_mode_write_all(d));
+ BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
+
changed = 0;
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
{
// Need a better solution long term.
if ( !(l2e_get_flags(new_pde) & _PAGE_PRESENT) &&
unlikely(l2e_get_value(new_pde) != 0) &&
- !unshadow &&
- (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
+ !unshadow && MFN_PINNED(smfn) )
unshadow = 1;
}
if ( max == -1 )
l2_pgentry_t *snapshot2 = snapshot;
l1_pgentry_t *shadow2 = shadow;
+ ASSERT(shadow_mode_write_all(d));
+ BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
+
changed = 0;
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
{
BUG();
}
- unmap_domain_mem(shadow);
+ if ( smfn )
+ unmap_domain_mem(shadow);
unmap_domain_mem(snapshot);
unmap_domain_mem(guest);
unsigned long hl2mfn;
if ( (hl2mfn = __shadow_status(d, entry->gpfn, PGT_hl2_shadow)) &&
- (frame_table[hl2mfn].u.inuse.type_info & PGT_pinned) )
+ MFN_PINNED(hl2mfn) )
shadow_unpin(hl2mfn);
}
}
!shadow_get_page_from_l1e(npte, d) )
BUG();
*ppte = npte;
- put_page_from_l1e(opte, d);
+ shadow_put_page_from_l1e(opte, d);
unmap_domain_mem(ppte);
}
/* Write fault? */
if ( regs->error_code & 2 )
{
+ int allow_writes = 0;
+
if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) )
{
- /* Write fault on a read-only mapping. */
- SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%lx)",
- l1e_get_value(gpte));
- perfc_incrc(shadow_fault_bail_ro_mapping);
- goto fail;
+ if ( shadow_mode_page_writable(d, l1e_get_pfn(gpte)) )
+ {
+ allow_writes = 1;
+ l1e_add_flags(&gpte, _PAGE_RW);
+ }
+ else
+ {
+ /* Write fault on a read-only mapping. */
+ SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%lx)",
+ l1e_get_value(gpte));
+ perfc_incrc(shadow_fault_bail_ro_mapping);
+ goto fail;
+ }
}
if ( !l1pte_write_fault(ed, &gpte, &spte, va) )
shadow_unlock(d);
return 0;
}
+
+ if ( allow_writes )
+ l1e_remove_flags(&gpte, _PAGE_RW);
}
else
{
/*
* STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
*/
-
- /* XXX Watch out for read-only L2 entries! (not used in Linux). */
- if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
- &gpte, sizeof(gpte))) )
+ if ( l1e_has_changed(&orig_gpte, &gpte, PAGE_FLAG_MASK) )
{
- printk("shadow_fault() failed, crashing domain %d "
- "due to a read-only L2 page table (gpde=%lx), va=%lx\n",
- d->id, l2e_get_value(gpde), va);
- domain_crash_synchronous();
- }
+ /* XXX Watch out for read-only L2 entries! (not used in Linux). */
+ if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
+ &gpte, sizeof(gpte))) )
+ {
+ printk("%s() failed, crashing domain %d "
+ "due to a read-only L2 page table (gpde=%lx), va=%lx\n",
+ __func__, d->id, l2e_get_value(gpde), va);
+ domain_crash_synchronous();
+ }
- // if necessary, record the page table page as dirty
- if ( unlikely(shadow_mode_log_dirty(d)) &&
- l1e_has_changed(&orig_gpte, &gpte, PAGE_FLAG_MASK))
- mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gpde)));
+ // if necessary, record the page table page as dirty
+ if ( unlikely(shadow_mode_log_dirty(d)) )
+ __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gpde)));
+ }
shadow_set_l1e(va, spte, 1);
return 0;
}
+void shadow_l1_normal_pt_update(
+ struct domain *d,
+ unsigned long pa, l1_pgentry_t gpte,
+ struct map_dom_mem_cache *cache)
+{
+ unsigned long sl1mfn;
+ l1_pgentry_t *spl1e, spte;
+
+ shadow_lock(d);
+
+ sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
+ if ( sl1mfn )
+ {
+ SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%08lx",
+ (void *)pa, l1e_get_value(gpte));
+ l1pte_propagate_from_guest(current->domain, gpte, &spte);
+
+ spl1e = map_domain_mem_with_cache(sl1mfn << PAGE_SHIFT, cache);
+ spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = spte;
+ unmap_domain_mem_with_cache(spl1e, cache);
+ }
+
+ shadow_unlock(d);
+}
+
+void shadow_l2_normal_pt_update(
+ struct domain *d,
+ unsigned long pa, l2_pgentry_t gpde,
+ struct map_dom_mem_cache *cache)
+{
+ unsigned long sl2mfn;
+ l2_pgentry_t *spl2e;
+
+ shadow_lock(d);
+
+ sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l2_shadow);
+ if ( sl2mfn )
+ {
+ SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%08lx",
+ (void *)pa, l2e_get_value(gpde));
+ spl2e = map_domain_mem_with_cache(sl2mfn << PAGE_SHIFT, cache);
+ validate_pde_change(d, gpde,
+ &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]);
+ unmap_domain_mem_with_cache(spl2e, cache);
+ }
+
+ shadow_unlock(d);
+}
+
+#ifdef __x86_64__
+void shadow_l3_normal_pt_update(
+ struct domain *d,
+ unsigned long pa, l3_pgentry_t gpde,
+ struct map_dom_mem_cache *cache)
+{
+ BUG(); // not yet implemented
+}
+
+void shadow_l4_normal_pt_update(
+ struct domain *d,
+ unsigned long pa, l4_pgentry_t gpde,
+ struct map_dom_mem_cache *cache)
+{
+ BUG(); // not yet implemented
+}
+#endif
+
+int shadow_do_update_va_mapping(unsigned long va,
+ l1_pgentry_t val,
+ struct exec_domain *ed)
+{
+ struct domain *d = ed->domain;
+ l1_pgentry_t spte;
+ int rc = 0;
+
+ shadow_lock(d);
+
+ //printk("%s(va=%p, val=%p)\n", __func__, (void *)va, (void *)l1e_get_value(val));
+
+ // This is actually overkill - we don't need to sync the L1 itself,
+ // just everything involved in getting to this L1 (i.e. we need
+ // linear_pg_table[l1_linear_offset(va)] to be in sync)...
+ //
+ __shadow_sync_va(ed, va);
+
+ l1pte_propagate_from_guest(d, val, &spte);
+ shadow_set_l1e(va, spte, 0);
+
+ /*
+ * If we're in log-dirty mode then we need to note that we've updated
+ * the PTE in the PT-holding page. We need the machine frame number
+ * for this.
+ */
+ if ( shadow_mode_log_dirty(d) )
+ __mark_dirty(d, va_to_l1mfn(ed, va));
+
+// out:
+ shadow_unlock(d);
+
+ return rc;
+}
+
+
/*
* What lives where in the 32-bit address space in the various shadow modes,
* and what it uses to get/maintain that mapping.
void __update_pagetables(struct exec_domain *ed)
{
struct domain *d = ed->domain;
- unsigned long gmfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT;
+ unsigned long gmfn = pagetable_get_pfn(ed->arch.guest_table);
unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
unsigned long smfn, hl2mfn, old_smfn;
smfn = shadow_l2_table(d, gpfn, gmfn);
if ( !get_shadow_ref(smfn) )
BUG();
- old_smfn = pagetable_val(ed->arch.shadow_table) >> PAGE_SHIFT;
+ old_smfn = pagetable_get_pfn(ed->arch.shadow_table);
ed->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
if ( old_smfn )
put_shadow_ref(old_smfn);
#if SHADOW_DEBUG
+// The following is entirely for _check_pagetable()'s benefit.
+// _check_pagetable() wants to know whether a given entry in a
+// shadow page table is supposed to be the shadow of the guest's
+// current entry, or the shadow of the entry held in the snapshot
+// taken above.
+//
+// Here, we mark all currently existing entries as reflecting
+// the snapshot, above. All other places in xen that update
+// the shadow will keep the shadow in sync with the guest's
+// entries (via l1pte_propagate_from_guest and friends), which clear
+// the SHADOW_REFLECTS_SNAPSHOT bit.
+//
+static void
+mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn)
+{
+ unsigned long smfn;
+ l1_pgentry_t *l1e;
+ l2_pgentry_t *l2e;
+ unsigned i;
+
+ if ( (smfn = __shadow_status(d, gpfn, PGT_l1_shadow)) )
+ {
+ l1e = map_domain_mem(smfn << PAGE_SHIFT);
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ if ( is_guest_l1_slot(i) &&
+ (l1e_get_flags(l1e[i]) & _PAGE_PRESENT) )
+ l1e_add_flags(&l1e[i], SHADOW_REFLECTS_SNAPSHOT);
+ unmap_domain_mem(l1e);
+ }
+
+ if ( (smfn = __shadow_status(d, gpfn, PGT_l2_shadow)) )
+ {
+ l2e = map_domain_mem(smfn << PAGE_SHIFT);
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+ if ( is_guest_l2_slot(i) &&
+ (l2e_get_flags(l2e[i]) & _PAGE_PRESENT) )
+ l2e_add_flags(&l2e[i], SHADOW_REFLECTS_SNAPSHOT);
+ unmap_domain_mem(l2e);
+ }
+}
+
// BUG: these are not SMP safe...
static int sh_l2_present;
static int sh_l1_present;
#define FAIL(_f, _a...) \
do { \
- printk("XXX %s-FAIL (%d,%d,%d)" _f " at %s(%d)\n", \
+ printk("XXX %s-FAIL (%d,%d,%d) " _f " at %s(%d)\n", \
sh_check_name, level, l2_idx, l1_idx, ## _a, \
__FILE__, __LINE__); \
- printk("g=%lx s=%lx &g=%p &s=%p" \
- " v2m(&g)=%08lx v2m(&s)=%08lx ea=%08x\n", \
- l1e_get_value(gpte), l1e_get_value(spte), pgpte, pspte, \
- v2m(ed, pgpte), v2m(ed, pspte), \
+ printk("guest_pte=%lx eff_guest_pte=%lx shadow_pte=%lx " \
+ "snapshot_pte=%lx &guest=%p &shadow=%p &snap=%p " \
+ "v2m(&guest)=%p v2m(&shadow)=%p v2m(&snap)=%p ea=%08x\n", \
+ l1e_get_value(guest_pte), l1e_get_value(eff_guest_pte), \
+ l1e_get_value(shadow_pte), l1e_get_value(snapshot_pte), \
+ p_guest_pte, p_shadow_pte, p_snapshot_pte, \
+ (void *)v2m(ed, p_guest_pte), (void *)v2m(ed, p_shadow_pte), \
+ (void *)v2m(ed, p_snapshot_pte), \
(l2_idx << L2_PAGETABLE_SHIFT) | \
(l1_idx << L1_PAGETABLE_SHIFT)); \
errors++; \
} while ( 0 )
static int check_pte(
- struct exec_domain *ed, l1_pgentry_t *pgpte, l1_pgentry_t *pspte,
- int level, int l2_idx, int l1_idx, int oos_ptes)
+ struct exec_domain *ed,
+ l1_pgentry_t *p_guest_pte,
+ l1_pgentry_t *p_shadow_pte,
+ l1_pgentry_t *p_snapshot_pte,
+ int level, int l2_idx, int l1_idx)
{
struct domain *d = ed->domain;
- l1_pgentry_t gpte = *pgpte;
- l1_pgentry_t spte = *pspte;
- unsigned long mask, gpfn, smfn, gmfn;
- int errors = 0;
+ l1_pgentry_t guest_pte = *p_guest_pte;
+ l1_pgentry_t shadow_pte = *p_shadow_pte;
+ l1_pgentry_t snapshot_pte = p_snapshot_pte ? *p_snapshot_pte : l1e_empty();
+ l1_pgentry_t eff_guest_pte;
+ unsigned long mask, eff_guest_pfn, eff_guest_mfn, shadow_mfn;
+ int errors = 0, guest_writable;
int page_table_page;
- if ( (l1e_get_value(spte) == 0) ||
- (l1e_get_value(spte) == 0xdeadface) ||
- (l1e_get_value(spte) == 0x00000E00) )
+ if ( (l1e_get_value(shadow_pte) == 0) ||
+ (l1e_get_value(shadow_pte) == 0xdeadface) ||
+ (l1e_get_value(shadow_pte) == 0x00000E00) )
return errors; /* always safe */
- if ( !(l1e_get_flags(spte) & _PAGE_PRESENT) )
- FAIL("Non zero not present spte");
+ if ( !(l1e_get_flags(shadow_pte) & _PAGE_PRESENT) )
+ FAIL("Non zero not present shadow_pte");
if ( level == 2 ) sh_l2_present++;
if ( level == 1 ) sh_l1_present++;
- if ( !(l1e_get_flags(gpte) & _PAGE_PRESENT) )
+ if ( (l1e_get_flags(shadow_pte) & SHADOW_REFLECTS_SNAPSHOT) && p_snapshot_pte )
+ eff_guest_pte = snapshot_pte;
+ else
+ eff_guest_pte = guest_pte;
+
+ if ( !(l1e_get_flags(eff_guest_pte) & _PAGE_PRESENT) )
FAIL("Guest not present yet shadow is");
- mask = ~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|PAGE_MASK);
+ mask = ~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|_PAGE_AVAIL|PAGE_MASK);
- if ( (l1e_get_value(spte) & mask) != (l1e_get_value(gpte) & mask) )
+ if ( ((l1e_get_value(shadow_pte) & mask) != (l1e_get_value(eff_guest_pte) & mask)) )
FAIL("Corrupt?");
if ( (level == 1) &&
- (l1e_get_flags(spte) & _PAGE_DIRTY) &&
- !(l1e_get_flags(gpte) & _PAGE_DIRTY) && !oos_ptes )
+ (l1e_get_flags(shadow_pte) & _PAGE_DIRTY) &&
+ !(l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY) )
FAIL("Dirty coherence");
- if ( (l1e_get_flags(spte) & _PAGE_ACCESSED) &&
- !(l1e_get_flags(gpte) & _PAGE_ACCESSED) && !oos_ptes )
+ if ( (l1e_get_flags(shadow_pte) & _PAGE_ACCESSED) &&
+ !(l1e_get_flags(eff_guest_pte) & _PAGE_ACCESSED) )
FAIL("Accessed coherence");
- if ( l1e_get_flags(spte) & _PAGE_GLOBAL )
+ if ( l1e_get_flags(shadow_pte) & _PAGE_GLOBAL )
FAIL("global bit set in shadow");
- smfn = l1e_get_pfn(spte);
- gpfn = l1e_get_pfn(gpte);
- gmfn = __gpfn_to_mfn(d, gpfn);
+ eff_guest_pfn = l1e_get_pfn(eff_guest_pte);
+ eff_guest_mfn = __gpfn_to_mfn(d, eff_guest_pfn);
+ shadow_mfn = l1e_get_pfn(shadow_pte);
+
+ if ( !VALID_MFN(eff_guest_mfn) && !shadow_mode_refcounts(d) )
+ FAIL("%s: invalid eff_guest_pfn=%lx eff_guest_pte=%lx\n", __func__, eff_guest_pfn,
+ l1e_get_value(eff_guest_pte));
- if ( !VALID_MFN(gmfn) )
- FAIL("%s: invalid gpfn=%lx gpte=%lx\n", __func__, gpfn,
- l1e_get_value(gpte));
+ page_table_page = mfn_is_page_table(eff_guest_mfn);
- page_table_page = mfn_is_page_table(gmfn);
+ guest_writable =
+ (l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
+ (VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && (level == 1) && mfn_out_of_sync(eff_guest_mfn));
- if ( (l1e_get_flags(spte) & _PAGE_RW ) &&
- !(l1e_get_flags(gpte) & _PAGE_RW) && !oos_ptes )
+ if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
{
- printk("gpfn=%lx gmfn=%lx smfn=%lx t=0x%08x page_table_page=%d "
- "oos_ptes=%d\n",
- gpfn, gmfn, smfn,
- frame_table[gmfn].u.inuse.type_info,
- page_table_page, oos_ptes);
+ printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
+ eff_guest_pfn, eff_guest_mfn, shadow_mfn,
+ frame_table[eff_guest_mfn].u.inuse.type_info,
+ page_table_page);
FAIL("RW coherence");
}
if ( (level == 1) &&
- (l1e_get_flags(spte) & _PAGE_RW ) &&
- !((l1e_get_flags(gpte) & _PAGE_RW) &&
- (l1e_get_flags(gpte) & _PAGE_DIRTY)) &&
- !oos_ptes )
- {
- printk("gpfn=%lx gmfn=%lx smfn=%lx t=0x%08x page_table_page=%d "
- "oos_ptes=%d\n",
- gpfn, gmfn, smfn,
- frame_table[gmfn].u.inuse.type_info,
- page_table_page, oos_ptes);
+ (l1e_get_flags(shadow_pte) & _PAGE_RW ) &&
+ !(guest_writable && (l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY)) )
+ {
+ printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
+ eff_guest_pfn, eff_guest_mfn, shadow_mfn,
+ frame_table[eff_guest_mfn].u.inuse.type_info,
+ page_table_page);
FAIL("RW2 coherence");
}
- if ( gmfn == smfn )
+ if ( eff_guest_mfn == shadow_mfn )
{
if ( level > 1 )
FAIL("Linear map ???"); /* XXX this will fail on BSD */
if ( level == 2 )
{
- if ( __shadow_status(d, gpfn, PGT_l1_shadow) != smfn )
- FAIL("smfn problem gpfn=%lx smfn=%lx", gpfn,
- __shadow_status(d, gpfn, PGT_l1_shadow));
+ if ( __shadow_status(d, eff_guest_pfn, PGT_l1_shadow) != shadow_mfn )
+ FAIL("shadow_mfn problem eff_guest_pfn=%lx shadow_mfn=%lx", eff_guest_pfn,
+ __shadow_status(d, eff_guest_pfn, PGT_l1_shadow));
}
else
BUG(); // XXX -- not handled yet.
{
struct domain *d = ed->domain;
int i;
- l1_pgentry_t *gpl1e, *spl1e;
- int errors = 0, oos_ptes = 0;
+ unsigned long snapshot_mfn;
+ l1_pgentry_t *p_guest, *p_shadow, *p_snapshot = NULL;
+ int errors = 0;
if ( page_out_of_sync(pfn_to_page(gmfn)) )
{
- gmfn = __shadow_status(d, gpfn, PGT_snapshot);
- oos_ptes = 1;
- ASSERT(gmfn);
+ snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
+ ASSERT(snapshot_mfn);
+ p_snapshot = map_domain_mem(snapshot_mfn << PAGE_SHIFT);
}
- gpl1e = map_domain_mem(gmfn << PAGE_SHIFT);
- spl1e = map_domain_mem(smfn << PAGE_SHIFT);
+ p_guest = map_domain_mem(gmfn << PAGE_SHIFT);
+ p_shadow = map_domain_mem(smfn << PAGE_SHIFT);
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
- errors += check_pte(ed, &gpl1e[i], &spl1e[i], 1, l2_idx, i, oos_ptes);
+ errors += check_pte(ed, p_guest+i, p_shadow+i,
+ p_snapshot ? p_snapshot+i : NULL,
+ 1, l2_idx, i);
- unmap_domain_mem(spl1e);
- unmap_domain_mem(gpl1e);
+ unmap_domain_mem(p_shadow);
+ unmap_domain_mem(p_guest);
+ if ( p_snapshot )
+ unmap_domain_mem(p_snapshot);
return errors;
}
errors += check_pte(ed,
(l1_pgentry_t*)(&gpl2e[i]), /* Hmm, dirty ... */
(l1_pgentry_t*)(&spl2e[i]),
- 2, i, 0, 0);
+ NULL,
+ 2, i, 0);
unmap_domain_mem(spl2e);
unmap_domain_mem(gpl2e);
perfc_incrc(page_faults);
- if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
+ if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
+ !shadow_mode_enabled(d)) )
{
LOCK_BIGLOCK(d);
if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
((regs->error_code & 3) == 3) && /* write-protection fault */
ptwr_do_page_fault(d, addr) )
{
- if ( unlikely(shadow_mode_enabled(d)) )
- (void)shadow_fault(addr, regs);
UNLOCK_BIGLOCK(d);
return EXCRET_fault_fixed;
}
d->arch.arch_vmx.cpu_cr3);
domain_crash_synchronous(); /* need to take a clean path */
}
- old_base_mfn = pagetable_val(d->arch.guest_table) >> PAGE_SHIFT;
+ old_base_mfn = pagetable_get_pfn(d->arch.guest_table);
if (old_base_mfn)
put_page(pfn_to_page(old_base_mfn));
"Invalid CR3 value=%lx", value);
domain_crash_synchronous(); /* need to take a clean path */
}
- old_base_mfn = pagetable_val(d->arch.guest_table) >> PAGE_SHIFT;
+ old_base_mfn = pagetable_get_pfn(d->arch.guest_table);
d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
if (old_base_mfn)
put_page(pfn_to_page(old_base_mfn));
extern int shadow_remove_all_write_access(
struct domain *d, unsigned long gpfn, unsigned long gmfn);
extern u32 shadow_remove_all_access( struct domain *d, unsigned long gmfn);
-extern int _shadow_mode_enabled(struct domain *d);
+extern int _shadow_mode_refcounts(struct domain *d);
static inline void put_page(struct pfn_info *page)
{
unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
unlikely(d != _domain) ) /* Wrong owner? */
{
- if ( !_shadow_mode_enabled(domain) )
+ if ( !_shadow_mode_refcounts(domain) )
DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%08x\n",
page_to_pfn(page), domain, unpickle_domptr(d),
x, page->u.inuse.type_info);
void ptwr_destroy(struct domain *);
void ptwr_flush(struct domain *, const int);
int ptwr_do_page_fault(struct domain *, unsigned long);
+int revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
#define cleanup_writable_pagetable(_d) \
do { \
- if ( unlikely(VM_ASSIST((_d), VMASST_TYPE_writable_pagetables)) ) { \
- if ( (_d)->arch.ptwr[PTWR_PT_ACTIVE].l1va ) \
- ptwr_flush((_d), PTWR_PT_ACTIVE); \
- if ( (_d)->arch.ptwr[PTWR_PT_INACTIVE].l1va ) \
- ptwr_flush((_d), PTWR_PT_INACTIVE); \
+ if ( likely(VM_ASSIST((_d), VMASST_TYPE_writable_pagetables)) ) \
+ { \
+ if ( likely(!shadow_mode_enabled(_d)) ) \
+ { \
+ if ( (_d)->arch.ptwr[PTWR_PT_ACTIVE].l1va ) \
+ ptwr_flush((_d), PTWR_PT_ACTIVE); \
+ if ( (_d)->arch.ptwr[PTWR_PT_INACTIVE].l1va ) \
+ ptwr_flush((_d), PTWR_PT_INACTIVE); \
+ } \
+ else \
+ shadow_sync_all(_d); \
} \
} while ( 0 )
#ifndef NDEBUG
-#define AUDIT_ALREADY_LOCKED ( 1u << 0 )
-#define AUDIT_ERRORS_OK ( 1u << 1 )
-#define AUDIT_QUIET ( 1u << 2 )
+#define AUDIT_SHADOW_ALREADY_LOCKED ( 1u << 0 )
+#define AUDIT_ERRORS_OK ( 1u << 1 )
+#define AUDIT_QUIET ( 1u << 2 )
void _audit_domain(struct domain *d, int flags);
#define audit_domain(_d) _audit_domain((_d), AUDIT_ERRORS_OK)
#ifndef __ASSEMBLY__
typedef struct { unsigned long pt_lo; } pagetable_t;
#define pagetable_val(_x) ((_x).pt_lo)
+#define pagetable_get_pfn(_x) ((_x).pt_lo >> PAGE_SHIFT)
#define mk_pagetable(_x) ( (pagetable_t) { (_x) } )
#endif
#define _PAGE_PAT 0x080UL
#define _PAGE_PSE 0x080UL
#define _PAGE_GLOBAL 0x100UL
+#define _PAGE_AVAIL 0xe00UL
#define __PAGE_HYPERVISOR \
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
#define SHM_enable (1<<0) /* we're in one of the shadow modes */
-#define SHM_log_dirty (1<<1) /* enable log dirty mode */
-#define SHM_translate (1<<2) /* do p2m tranaltion on guest tables */
-#define SHM_external (1<<3) /* external page table, not used by Xen */
+#define SHM_refcounts (1<<1) /* refcounts based on shadow tables instead of
+ guest tables */
+#define SHM_write_all (1<<2) /* allow write access to all guest pt pages,
+ regardless of pte write permissions */
+#define SHM_log_dirty (1<<3) /* enable log dirty mode */
+#define SHM_translate (1<<4) /* do p2m tranaltion on guest tables */
+#define SHM_external (1<<5) /* external page table, not used by Xen */
#define shadow_mode_enabled(_d) ((_d)->arch.shadow_mode)
+#define shadow_mode_refcounts(_d) ((_d)->arch.shadow_mode & SHM_refcounts)
+#define shadow_mode_write_all(_d) ((_d)->arch.shadow_mode & SHM_write_all)
#define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty)
#define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate)
#define shadow_mode_external(_d) ((_d)->arch.shadow_mode & SHM_external)
extern void __shadow_sync_all(struct domain *d);
extern int __shadow_out_of_sync(struct exec_domain *ed, unsigned long va);
extern int set_p2m_entry(
- struct domain *d, unsigned long pfn, unsigned long mfn);
+ struct domain *d, unsigned long pfn, unsigned long mfn,
+ struct map_dom_mem_cache *l2cache,
+ struct map_dom_mem_cache *l1cache);
+extern void remove_shadow(struct domain *d, unsigned long gpfn, u32 stype);
+
+extern void shadow_l1_normal_pt_update(struct domain *d,
+ unsigned long pa, l1_pgentry_t l1e,
+ struct map_dom_mem_cache *cache);
+extern void shadow_l2_normal_pt_update(struct domain *d,
+ unsigned long pa, l2_pgentry_t l2e,
+ struct map_dom_mem_cache *cache);
+#ifdef __x86_64__
+extern void shadow_l3_normal_pt_update(struct domain *d,
+ unsigned long pa, l3_pgentry_t l3e,
+ struct map_dom_mem_cache *cache);
+extern void shadow_l4_normal_pt_update(struct domain *d,
+ unsigned long pa, l4_pgentry_t l4e,
+ struct map_dom_mem_cache *cache);
+#endif
+extern int shadow_do_update_va_mapping(unsigned long va,
+ l1_pgentry_t val,
+ struct exec_domain *ed);
+
static inline unsigned long __shadow_status(
struct domain *d, unsigned long gpfn, unsigned long stype);
static inline int page_is_page_table(struct pfn_info *page)
{
- return page->count_info & PGC_page_table;
+ struct domain *owner = page_get_owner(page);
+
+ if ( owner && shadow_mode_refcounts(owner) )
+ return page->count_info & PGC_page_table;
+
+ u32 type_info = page->u.inuse.type_info & PGT_type_mask;
+ return type_info && (type_info <= PGT_l4_page_table);
}
static inline int mfn_is_page_table(unsigned long mfn)
if ( !pfn_valid(mfn) )
return 0;
- return frame_table[mfn].count_info & PGC_page_table;
+ return page_is_page_table(pfn_to_page(mfn));
}
static inline int page_out_of_sync(struct pfn_info *page)
if ( !pfn_valid(mfn) )
return 0;
- return frame_table[mfn].count_info & PGC_out_of_sync;
+ return page_out_of_sync(pfn_to_page(mfn));
}
: (mfn) )
#define __gpfn_to_mfn(_d, gpfn) \
- ( (shadow_mode_translate(_d)) \
- ? ({ ASSERT(current->domain == (_d)); \
- phys_to_machine_mapping(gpfn); }) \
- : (gpfn) )
+ ({ \
+ ASSERT(current->domain == (_d)); \
+ (shadow_mode_translate(_d)) \
+ ? phys_to_machine_mapping(gpfn) \
+ : (gpfn); \
+ })
#define __gpfn_to_mfn_foreign(_d, gpfn) \
( (shadow_mode_translate(_d)) \
#if SHADOW_DEBUG
extern int shadow_status_noswap;
+#define _SHADOW_REFLECTS_SNAPSHOT ( 9)
+#define SHADOW_REFLECTS_SNAPSHOT (1u << _SHADOW_REFLECTS_SNAPSHOT)
#endif
#ifdef VERBOSE
ASSERT(l1e_get_flags(l1e) & _PAGE_PRESENT);
+ if ( !shadow_mode_refcounts(d) )
+ return 1;
+
nl1e = l1e;
l1e_remove_flags(&nl1e, _PAGE_GLOBAL);
res = get_page_from_l1e(nl1e, d);
if ( unlikely(!res) && IS_PRIV(d) && !shadow_mode_translate(d) &&
- !(l1e_get_flags(l1e) & L1_DISALLOW_MASK) &&
- (mfn = l1e_get_pfn(l1e)) &&
+ !(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) &&
+ (mfn = l1e_get_pfn(nl1e)) &&
pfn_valid(mfn) &&
- (owner = page_get_owner(pfn_to_page(l1e_get_pfn(l1e)))) &&
+ (owner = page_get_owner(pfn_to_page(mfn))) &&
(d != owner) )
{
res = get_page_from_l1e(nl1e, owner);
return res;
}
+static inline void
+shadow_put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
+{
+ if ( !shadow_mode_refcounts(d) )
+ return;
+
+ put_page_from_l1e(l1e, d);
+}
+
+static inline void
+shadow_put_page_type(struct domain *d, struct pfn_info *page)
+{
+ if ( !shadow_mode_refcounts(d) )
+ return;
+
+ put_page_type(page);
+}
+
+static inline int shadow_get_page(struct domain *d,
+ struct pfn_info *page,
+ struct domain *owner)
+{
+ if ( !shadow_mode_refcounts(d) )
+ return 1;
+ return get_page(page, owner);
+}
+
+static inline void shadow_put_page(struct domain *d,
+ struct pfn_info *page)
+{
+ if ( !shadow_mode_refcounts(d) )
+ return;
+ put_page(page);
+}
+
+/************************************************************************/
+
+static inline int __mark_dirty(struct domain *d, unsigned int mfn)
+{
+ unsigned long pfn;
+ int rc = 0;
+
+ ASSERT(spin_is_locked(&d->arch.shadow_lock));
+ ASSERT(d->arch.shadow_dirty_bitmap != NULL);
+
+ if ( !VALID_MFN(mfn) )
+ return rc;
+
+ // N.B. This doesn't use __mfn_to_gpfn().
+ // This wants the nice compact set of PFNs from 0..domain's max,
+ // which __mfn_to_gpfn() only returns for translated domains.
+ //
+ pfn = machine_to_phys_mapping[mfn];
+
+ /*
+ * Values with the MSB set denote MFNs that aren't really part of the
+ * domain's pseudo-physical memory map (e.g., the shared info frame).
+ * Nothing to do here...
+ */
+ if ( unlikely(IS_INVALID_M2P_ENTRY(pfn)) )
+ return rc;
+
+ if ( likely(pfn < d->arch.shadow_dirty_bitmap_size) )
+ {
+ /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
+ if ( !__test_and_set_bit(pfn, d->arch.shadow_dirty_bitmap) )
+ {
+ d->arch.shadow_dirty_count++;
+ rc = 1;
+ }
+ }
+#ifndef NDEBUG
+ else if ( mfn < max_page )
+ {
+ SH_LOG("mark_dirty OOR! mfn=%x pfn=%lx max=%x (dom %p)",
+ mfn, pfn, d->arch.shadow_dirty_bitmap_size, d);
+ SH_LOG("dom=%p caf=%08x taf=%08x",
+ page_get_owner(&frame_table[mfn]),
+ frame_table[mfn].count_info,
+ frame_table[mfn].u.inuse.type_info );
+ }
+#endif
+
+ return rc;
+}
+
+
+static inline int mark_dirty(struct domain *d, unsigned int mfn)
+{
+ int rc;
+ shadow_lock(d);
+ rc = __mark_dirty(d, mfn);
+ shadow_unlock(d);
+ return rc;
+}
+
+
/************************************************************************/
static inline void
__guest_set_l2e(
struct exec_domain *ed, unsigned long va, l2_pgentry_t value)
{
+ struct domain *d = ed->domain;
+
ed->arch.guest_vtable[l2_table_offset(va)] = value;
- if ( unlikely(shadow_mode_translate(ed->domain)) )
+ if ( unlikely(shadow_mode_translate(d)) )
update_hl2e(ed, va);
+
+ if ( unlikely(shadow_mode_log_dirty(d)) )
+ __mark_dirty(d, pagetable_get_pfn(ed->arch.guest_table));
}
static inline void
if ( (l1e_has_changed(&old_hl2e, &new_hl2e, _PAGE_PRESENT)) )
{
if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
- !get_page(pfn_to_page(l1e_get_pfn(new_hl2e)), ed->domain) )
+ !shadow_get_page(ed->domain, pfn_to_page(l1e_get_pfn(new_hl2e)),
+ ed->domain) )
new_hl2e = l1e_empty();
if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
{
- put_page(pfn_to_page(l1e_get_pfn(old_hl2e)));
+ shadow_put_page(ed->domain, pfn_to_page(l1e_get_pfn(old_hl2e)));
need_flush = 1;
}
}
static inline void shadow_drop_references(
struct domain *d, struct pfn_info *page)
{
- if ( likely(!shadow_mode_enabled(d)) ||
+ if ( likely(!shadow_mode_refcounts(d)) ||
((page->u.inuse.type_info & PGT_count_mask) == 0) )
return;
static inline void shadow_sync_and_drop_references(
struct domain *d, struct pfn_info *page)
{
- if ( likely(!shadow_mode_enabled(d)) )
+ if ( likely(!shadow_mode_refcounts(d)) )
return;
shadow_lock(d);
}
-/************************************************************************/
-
-static inline int __mark_dirty(struct domain *d, unsigned int mfn)
-{
- unsigned long pfn;
- int rc = 0;
-
- ASSERT(spin_is_locked(&d->arch.shadow_lock));
- ASSERT(d->arch.shadow_dirty_bitmap != NULL);
-
- if ( !VALID_MFN(mfn) )
- return rc;
-
- pfn = __mfn_to_gpfn(d, mfn);
-
- /*
- * Values with the MSB set denote MFNs that aren't really part of the
- * domain's pseudo-physical memory map (e.g., the shared info frame).
- * Nothing to do here...
- */
- if ( unlikely(IS_INVALID_M2P_ENTRY(pfn)) )
- return rc;
-
- if ( likely(pfn < d->arch.shadow_dirty_bitmap_size) )
- {
- /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
- if ( !__test_and_set_bit(pfn, d->arch.shadow_dirty_bitmap) )
- {
- d->arch.shadow_dirty_count++;
- rc = 1;
- }
- }
-#ifndef NDEBUG
- else if ( mfn < max_page )
- {
- SH_LOG("mark_dirty OOR! mfn=%x pfn=%lx max=%x (dom %p)",
- mfn, pfn, d->arch.shadow_dirty_bitmap_size, d);
- SH_LOG("dom=%p caf=%08x taf=%08x\n",
- page_get_owner(&frame_table[mfn]),
- frame_table[mfn].count_info,
- frame_table[mfn].u.inuse.type_info );
- }
-#endif
-
- return rc;
-}
-
-
-static inline int mark_dirty(struct domain *d, unsigned int mfn)
-{
- int rc;
- shadow_lock(d);
- rc = __mark_dirty(d, mfn);
- shadow_unlock(d);
- return rc;
-}
-
-
/************************************************************************/
extern void shadow_mark_va_out_of_sync(
(_PAGE_PRESENT|_PAGE_ACCESSED)) &&
VALID_MFN(mfn = __gpfn_to_mfn(d, l1e_get_pfn(gpte))) )
{
- spte = l1e_create_pfn(mfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
-
+ spte = l1e_create_pfn(mfn,
+ l1e_get_flags(gpte) &
+ ~(_PAGE_GLOBAL | _PAGE_AVAIL));
+
if ( shadow_mode_log_dirty(d) ||
!(l1e_get_flags(gpte) & _PAGE_DIRTY) ||
mfn_is_page_table(mfn) )
spde = l2e_empty();
if ( (l2e_get_flags(gpde) & _PAGE_PRESENT) && (sl1mfn != 0) )
{
- spde = l2e_create_pfn(sl1mfn,
- l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED);
- l2e_add_flags(&gpde, _PAGE_ACCESSED); /* N.B. PDEs do not have a dirty bit. */
+ spde = l2e_create_pfn(sl1mfn,
+ (l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED)
+ & ~(_PAGE_AVAIL));
+
+ /* N.B. PDEs do not have a dirty bit. */
+ l2e_add_flags(&gpde, _PAGE_ACCESSED);
- // XXX mafetter: Hmm...
- // Shouldn't the dirty log be checked/updated here?
- // Actually, it needs to be done in this function's callers.
- //
*gpde_p = gpde;
}
l1_pgentry_t *shadow_pte_p)
{
l1_pgentry_t old_spte, new_spte;
+ int need_flush = 0;
perfc_incrc(validate_pte_calls);
-#if 0
- FSH_LOG("validate_pte(old=%lx new=%lx)", old_pte, new_pte);
-#endif
-
- old_spte = *shadow_pte_p;
l1pte_propagate_from_guest(d, new_pte, &new_spte);
- // only do the ref counting if something important changed.
- //
- if ( ((l1e_get_value(old_spte) | l1e_get_value(new_spte)) & _PAGE_PRESENT ) &&
- l1e_has_changed(&old_spte, &new_spte, _PAGE_RW | _PAGE_PRESENT) )
+ if ( shadow_mode_refcounts(d) )
{
- perfc_incrc(validate_pte_changes);
+ old_spte = *shadow_pte_p;
- if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
- !shadow_get_page_from_l1e(new_spte, d) )
- new_spte = l1e_empty();
- if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
- put_page_from_l1e(old_spte, d);
+ if ( l1e_get_value(old_spte) == l1e_get_value(new_spte) )
+ {
+ // No accounting required...
+ //
+ perfc_incrc(validate_pte_changes1);
+ }
+ else if ( l1e_get_value(old_spte) == (l1e_get_value(new_spte)|_PAGE_RW) )
+ {
+ // Fast path for PTEs that have merely been write-protected
+ // (e.g., during a Unix fork()). A strict reduction in privilege.
+ //
+ perfc_incrc(validate_pte_changes2);
+ if ( likely(l1e_get_flags(new_spte) & _PAGE_PRESENT) )
+ shadow_put_page_type(d, &frame_table[l1e_get_pfn(new_spte)]);
+ }
+ else if ( ((l1e_get_flags(old_spte) | l1e_get_flags(new_spte)) &
+ _PAGE_PRESENT ) &&
+ l1e_has_changed(&old_spte, &new_spte, _PAGE_RW | _PAGE_PRESENT) )
+ {
+ // only do the ref counting if something important changed.
+ //
+ perfc_incrc(validate_pte_changes3);
+
+ if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
+ !shadow_get_page_from_l1e(new_spte, d) )
+ new_spte = l1e_empty();
+ if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
+ {
+ shadow_put_page_from_l1e(old_spte, d);
+ need_flush = 1;
+ }
+ }
+ else
+ {
+ perfc_incrc(validate_pte_changes4);
+ }
}
*shadow_pte_p = new_spte;
- // paranoia rules!
- return 1;
+ return need_flush;
}
// returns true if a tlb flush is needed
l1_pgentry_t *shadow_hl2e_p)
{
l1_pgentry_t old_hl2e, new_hl2e;
+ int need_flush = 0;
perfc_incrc(validate_hl2e_calls);
!get_page(pfn_to_page(l1e_get_pfn(new_hl2e)), d) )
new_hl2e = l1e_empty();
if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
+ {
put_page(pfn_to_page(l1e_get_pfn(old_hl2e)));
+ need_flush = 1;
+ }
}
*shadow_hl2e_p = new_hl2e;
- // paranoia rules!
- return 1;
-
+ return need_flush;
}
// returns true if a tlb flush is needed
l2_pgentry_t *shadow_pde_p)
{
l2_pgentry_t old_spde, new_spde;
+ int need_flush = 0;
perfc_incrc(validate_pde_calls);
old_spde = *shadow_pde_p;
l2pde_propagate_from_guest(d, &new_gpde, &new_spde);
- // XXX Shouldn't we propagate the new_gpde to the guest?
- // And then mark the guest's L2 page as dirty?
-
// Only do the ref counting if something important changed.
//
if ( ((l2e_get_value(old_spde) | l2e_get_value(new_spde)) & _PAGE_PRESENT) &&
!get_shadow_ref(l2e_get_pfn(new_spde)) )
BUG();
if ( l2e_get_flags(old_spde) & _PAGE_PRESENT )
+ {
put_shadow_ref(l2e_get_pfn(old_spde));
+ need_flush = 1;
+ }
}
*shadow_pde_p = new_spde;
- // paranoia rules!
- return 1;
+ return need_flush;
}
/*********************************************************************/
{
perfc_incrc(shadow_status_shortcut);
#ifndef NDEBUG
- ASSERT(___shadow_status(d, gpfn, stype) == 0);
+ if ( ___shadow_status(d, gpfn, stype) != 0 )
+ {
+ printk("d->id=%d gpfn=%lx gmfn=%lx stype=%lx c=%x t=%x "
+ "mfn_out_of_sync(gmfn)=%d mfn_is_page_table(gmfn)=%d\n",
+ d->id, gpfn, gmfn, stype,
+ frame_table[gmfn].count_info,
+ frame_table[gmfn].u.inuse.type_info,
+ mfn_out_of_sync(gmfn), mfn_is_page_table(gmfn));
+ BUG();
+ }
- // Undo the affects of the above ASSERT on ___shadow_status()'s perf
- // counters.
+ // Undo the affects of the above call to ___shadow_status()'s perf
+ // counters, since that call is really just part of an assertion.
//
perfc_decrc(shadow_status_calls);
perfc_decrc(shadow_status_miss);
*
* Either returns PGT_none, or PGT_l{1,2,3,4}_page_table.
*/
-static inline unsigned long
+static inline u32
shadow_max_pgtable_type(struct domain *d, unsigned long gpfn,
unsigned long *smfn)
{
struct shadow_status *x;
- unsigned long pttype = PGT_none, type;
+ u32 pttype = PGT_none, type;
ASSERT(spin_is_locked(&d->arch.shadow_lock));
ASSERT(gpfn == (gpfn & PGT_mfn_mask));
struct exec_domain *ed = current;
struct domain *d = ed->domain;
l2_pgentry_t sl2e;
- l1_pgentry_t old_spte;
#if 0
printk("shadow_set_l1e(va=%p, new_spte=%p, create=%d)\n",
}
}
- old_spte = shadow_linear_pg_table[l1_linear_offset(va)];
-
- // only do the ref counting if something important changed.
- //
- if ( l1e_has_changed(&old_spte, &new_spte, _PAGE_RW | _PAGE_PRESENT) )
+ if ( shadow_mode_refcounts(d) )
{
- if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
- !shadow_get_page_from_l1e(new_spte, d) )
- new_spte = l1e_empty();
- if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
- put_page_from_l1e(old_spte, d);
+ l1_pgentry_t old_spte = shadow_linear_pg_table[l1_linear_offset(va)];
+
+ // only do the ref counting if something important changed.
+ //
+ if ( l1e_has_changed(&old_spte, &new_spte, _PAGE_RW | _PAGE_PRESENT) )
+ {
+ if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
+ !shadow_get_page_from_l1e(new_spte, d) )
+ new_spte = l1e_empty();
+ if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
+ shadow_put_page_from_l1e(old_spte, d);
+ }
}
shadow_linear_pg_table[l1_linear_offset(va)] = new_spte;
/************************************************************************/
+static inline int
+shadow_mode_page_writable(struct domain *d, unsigned long gpfn)
+{
+ unsigned long mfn = __gpfn_to_mfn(d, gpfn);
+ u32 type = frame_table[mfn].u.inuse.type_info & PGT_type_mask;
+
+ if ( shadow_mode_refcounts(d) &&
+ (type == PGT_writable_page) )
+ type = shadow_max_pgtable_type(d, gpfn, NULL);
+
+ if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
+ (type == PGT_l1_page_table) )
+ return 1;
+
+ if ( shadow_mode_write_all(d) &&
+ type && (type <= PGT_l4_page_table) )
+ return 1;
+
+ return 0;
+}
+
static inline l1_pgentry_t gva_to_gpte(unsigned long gva)
{
l2_pgentry_t gpde;
*/
extern void unmap_domain_mem(void *va);
+struct map_dom_mem_cache {
+ unsigned long pa;
+ void *va;
+};
+
+#define MAP_DOM_MEM_CACHE_INIT { .pa = 0 }
+
+static inline void *
+map_domain_mem_with_cache(unsigned long pa,
+ struct map_dom_mem_cache *cache)
+{
+ if ( likely(cache != NULL) )
+ {
+ if ( likely(cache->pa) )
+ {
+ if ( likely((pa & PAGE_MASK) == (cache->pa & PAGE_MASK)) )
+ goto done;
+ unmap_domain_mem(cache->va);
+ }
+ cache->pa = (pa & PAGE_MASK) | 1;
+ cache->va = map_domain_mem(cache->pa);
+ done:
+ return (void *)(((unsigned long)cache->va & PAGE_MASK) |
+ (pa & ~PAGE_MASK));
+ }
+
+ return map_domain_mem(pa);
+}
+
+static inline void
+unmap_domain_mem_with_cache(void *va,
+ struct map_dom_mem_cache *cache)
+{
+ if ( unlikely(!cache) )
+ unmap_domain_mem(va);
+}
+
+static inline void
+unmap_domain_mem_cache(struct map_dom_mem_cache *cache)
+{
+ if ( likely(cache != NULL) && likely(cache->pa) )
+ {
+ unmap_domain_mem(cache->va);
+ cache->pa = 0;
+ }
+}
+
#endif /* __ASM_DOMAIN_PAGE_H__ */
#define BUG_ON(_p) do { if (_p) BUG(); } while ( 0 )
#ifndef NDEBUG
-#define ASSERT(_p) if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s\n", #_p , __LINE__, __FILE__); BUG(); }
+#define ASSERT(_p) { if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s\n", #_p , __LINE__, __FILE__); BUG(); } }
#else
#define ASSERT(_p) ((void)0)
#endif
PERFCOUNTER_CPU(shadow_make_snapshot, "snapshots created")
PERFCOUNTER_CPU(shadow_mark_mfn_out_of_sync_calls, "calls to shadow_mk_out_of_sync")
PERFCOUNTER_CPU(shadow_out_of_sync_calls, "calls to shadow_out_of_sync")
-PERFCOUNTER_CPU(extra_va_update_sync, "extra syncs for bug in chk_pgtb")
PERFCOUNTER_CPU(snapshot_entry_matches_calls, "calls to ss_entry_matches")
PERFCOUNTER_CPU(snapshot_entry_matches_true, "ss_entry_matches returns true")
PERFCOUNTER_CPU(validate_pte_calls, "calls to validate_pte_change")
-PERFCOUNTER_CPU(validate_pte_changes, "validate_pte makes changes")
+PERFCOUNTER_CPU(validate_pte_changes1, "validate_pte makes changes1")
+PERFCOUNTER_CPU(validate_pte_changes2, "validate_pte makes changes2")
+PERFCOUNTER_CPU(validate_pte_changes3, "validate_pte makes changes3")
+PERFCOUNTER_CPU(validate_pte_changes4, "validate_pte makes changes4")
PERFCOUNTER_CPU(validate_pde_calls, "calls to validate_pde_change")
PERFCOUNTER_CPU(validate_pde_changes, "validate_pde makes changes")
PERFCOUNTER_CPU(shadow_get_page_fail, "shadow_get_page_from_l1e fails" )